##replication code for table 2 ##

setwd("C:/SBA NEW/");

library(bunching)


a=25000;
b=40500;
c=0;
zstar=25000;
x0=500;
binwidth=x0;

y<-c(4:6);

bpdlbunching<-lapply(seq(1:3),function (j) {
y0<-y[j];


bins_excl_r <- 1
         
            firstpass_prep <- bunching::prep_data_for_fit(binned_data14200,zstar = zstar, binwidth = x0,
                          bins_l =a/x0, bins_r = b/x0, rn=c(5000,10000), bins_excl_l=c/x0,bins_excl_r=bins_excl_r,poly=y0)
            firstpass <- bunching::fit_bunching(firstpass_prep$data_binned, 
                firstpass_prep$model_formula,binwidth=x0,notch=TRUE)
            B_below <- firstpass$B_zl_zstar
            M_above <- -firstpass$B_zstar_zu
                       if (M_above > B_below) {
                stop("Missing mass above zstar is larger than bunching mass below. Are you sure this is a notch?")
            }
            available_bins <- (max(firstpass_prep$data_binned$bin) - 
                zstar)/binwidth
            DoF_remaining <- nrow(firstpass_prep$data_binned) - 
                nrow(firstpass$coefficients) - 1
            notch_iterations_bound <- min(available_bins, DoF_remaining)
            zu_bin <- bins_excl_r
            tmp_firstpass_prep <- firstpass_prep
            while ((B_below > M_above) & (zu_bin < notch_iterations_bound)) {
                zu_bin_final <- zu_bin
                zu_bin <- zu_bin + 1
                newvar <- paste0("bin_excl_r_", zu_bin)
                tmp_firstpass_prep$data_binned[[newvar]] <- ifelse(tmp_firstpass_prep$data_binned$z_rel == 
                  zu_bin, 1, 0)
      tmp_firstpass_prep$model_formula <- stats::as.formula(paste(Reduce(paste, 
                  deparse(tmp_firstpass_prep$model_formula)), 
                  newvar, sep = " + "))

   tmp_firstpass <- bunching::fit_bunching(tmp_firstpass_prep$data_binned, 
                  tmp_firstpass_prep$model_formula,binwidth=x0, notch=TRUE)

                B_below <- tmp_firstpass$B_zl_zstar
                M_above <- -tmp_firstpass$B_zstar_zu
                if (B_below > M_above) {
                  firstpass_prep <- tmp_firstpass_prep
                  firstpass <- tmp_firstpass
                  zu_bin_final <- zu_bin
                }
            }
     
mb<-zu_bin*x0+zstar

 B <- tmp_firstpass$B_zl_zsta/sum(binned_data14200$freq)
      


 ratio=(1-zstar/(mb));
beta<-1/3;
alpha1<-1/3;
R<-1+0.04
    cost1=((1/alpha1)*(1-(1-ratio)^(alpha1))-ratio)*R;
nu=0.83
alpha2<-beta*nu/(1-(1-beta)*nu)
cost2=((1/alpha2)*(1-(1-ratio)^(alpha2))-ratio)*R;

  forbootpass<- tmp_firstpass;
forbootprep<- tmp_firstpass_prep;


    # retrieve data and model from firstpass_prep
     data_for_boot <- forbootprep$data_binned
    model <- forbootprep$model_formula

   residuals <- forbootpass$residuals;

n_boot<-1000;

    # get vector of bootstrapped betas
    boot_results <- lapply(seq(1:n_boot), function(i) {
        # adjust frequencies using residual
        data_for_boot$freq <- data_for_boot$freq_orig+  sample(residuals, replace = TRUE)
        # make this "freq" so we can pass to fit_bunching which requires "freq ~ ..."
      
 bins_excl_r <- 1

  boot_firstpass_prep <- bunching::prep_data_for_fit(data_for_boot,zstar = zstar, binwidth = x0,
                          bins_l =a/x0, bins_r = b/x0, rn=c(5000,10000),bins_excl_l=c/x0,bins_excl_r=bins_excl_r,poly=y0)
            boot_firstpass <- bunching::fit_bunching(boot_firstpass_prep$data_binned, 
                boot_firstpass_prep$model_formula,binwidth=x0, notch=TRUE)
    
    # extract bunching mass below and missing mass above zstar
    B_below <-   boot_firstpass$B_zl_zstar
    M_above <- -boot_firstpass$B_zstar_zu
 
    # if(M_above > B_below) == TRUE, we start shifting zu bins up until B = M.
    # first, must set upper bound on number of iterations.
    # bins above zstar cannot be more than min of:
    #   1. available bins
    #   2. remaining DoF

       if (M_above > B_below) {
  Bestimate<-   B_below;
Mestimate<- M_above
    booted_zU_notch=0;
    ratio=(1-zstar/(zstar+booted_zU_notch*x0));
     cost1=((1/alpha1)*(1-(1-ratio)^(alpha1))-ratio)*R;
cost2=((1/alpha2)*(1-(1-ratio)^(alpha2))-ratio)*R;



tempresult<-cbind(booted_zU_notch,Bestimate,ratio,cost1,cost2,indicator,dif)
     } else{                      
    available_bins <- (max(boot_firstpass_prep$data_binned$bin) - zstar)/binwidth
    DoF_remaining <- nrow(boot_firstpass_prep$data_binned) -  nrow(boot_firstpass$coefficients) - 1
    notch_iterations_bound <- min(available_bins, DoF_remaining)
    
    # start with first bin above being bins_excl_r = 1
    zu_bin <- bins_excl_r
    
    # create temporary object for firstpass_prep (we will be editing this as we expand the window for zu_bin)
    tmp_firstpass_prep <- boot_firstpass_prep
    
    while ((B_below > M_above) & (zu_bin < notch_iterations_bound)) {
        # keep previous zu_bin in case next gives us B > M
        zu_bin_final <- zu_bin
        # now try expanding by 1 bin
        zu_bin <- zu_bin + 1
        # add next bin_excl_r dummy as column to data
        newvar <- paste0("bin_excl_r_", zu_bin)
        tmp_firstpass_prep$data_binned[[newvar]]  <- ifelse(tmp_firstpass_prep$data_binned$z_rel == zu_bin,1,0)
        # add next order bin_excl_r to formula
         tmp_firstpass_prep$model_formula <- stats::as.formula(paste(Reduce(paste, deparse(tmp_firstpass_prep$model_formula)), newvar, sep = " + "))
        # re-fit model using the now expanded zu
        tmp_firstpass <- bunching::fit_bunching(tmp_firstpass_prep$data_binned, tmp_firstpass_prep$model_formula,binwidth=x0,  notch=TRUE)
        # get new B below and M above
        B_below <- tmp_firstpass$B_zl_zstar
        M_above <- -tmp_firstpass$B_zstar_zu
        
        # if M is not larger, update firstpass with this (last iterations of loop will have M > B)
        if(B_below > M_above) {
            boot_firstpass_prep <- tmp_firstpass_prep
            boot_firstpass <- tmp_firstpass
            zu_bin_final <- zu_bin
        }
        
    }

  boot_firstpass<- tmp_firstpass;
boot_firstpass_prep<- tmp_firstpass_prep;

    # assign final zu_bin_final to bins_excl_r (used for plotting)


      

   Bestimate<-  boot_firstpass$B_zl_zstar;
Mestimate<- boot_firstpass$B_zstar_zu*(-1);
    booted_zU_notch=zu_bin;

    ratio=(1-zstar/(zstar+booted_zU_notch*x0));
     cost1=((1/alpha1)*(1-(1-ratio)^(alpha1))-ratio)*R;
cost2=((1/alpha2)*(1-(1-ratio)^(alpha2))-ratio)*R;



tempresult<-cbind(booted_zU_notch,Bestimate,ratio,cost1,cost2)
}

   
        return(tempresult)
    })



    zU_Notch_boot <-do.call(rbind,boot_results)
zU_Notch_boot<-as.data.frame(zU_Notch_boot);


zU_Notch_boot<-subset(zU_Notch_boot,zU_Notch_boot$booted_zU_notch<=50); ## we drop distant simulated marginal bunchers as they do not bunch close to the threshold but only start to bunch in the end, which is against the definition of a marginal buncher in our theory.## 


zU_bin_sd<-sd(zU_Notch_boot$booted_zU_notch);
B_sd<-sd(zU_Notch_boot$Bestimate)/sum(binned_data14200$freq);
mb_sd<-zU_bin_sd*x0
ratio_sd<-sd(zU_Notch_boot$ratio);
cost1_sd<-sd(zU_Notch_boot$cost1);
cost2_sd<-sd(zU_Notch_boot$cost2);


result<-cbind(B,B_sd,mb,mb_sd,ratio,ratio_sd,cost1,cost1_sd,cost2,cost2_sd);

return (result)

})

bpdl1420result<-do.call(rbind,bpdlbunching);
bpdl1420result<-as.data.frame(bpdl1420result)



